/* "Efficiency and water use: Dynamic effects of irrigation technology adoption
by Micah Cameron-Harp and Nathan Hendricks

Code written by Micah Cameron-Harp
May 16th, 2024

This do file creates:
	Table 1 - Contains summary statistics specific to each technology
	Figure 1 - Time series plots of dependant variables by technology
	Figure 2 - Plot of use of irrigation technologies over time
*/

/* NOTE - Global macros for directories are created in the main_text_results.do 
	file which calls this individual .do file */

*Create log file
log using "$dr_output_log\descriptive_stats_plots", replace

********************************************************************************
********************************** Table 1 *************************************
********************************************************************************
use "$dr_temp\wrg_collapsed.dta", replace

*Declare panel structure of data
	xtset WR_GROUP wua_year
	xtdescribe  /* There are 18,541 WR_GROUP's in this dataset only using wuadet_key records with IRR umw_code */

/*Create a irrigation intensity outcome variable */
	bysort WR_GROUP wua_year: gen depth_applied = af_used_irr/acres_irr
	
/* Address missing and extreme values */
	*Drop wr_groups who don't have predevelopment characteristics
	keep if !missing(sy)
	keep if !missing(predev_sat)
	keep if !missing(predev_dtw)
	keep if !missing(hyd_cond)
	*Drop wr_groups if they have missing precip or soil data
	drop if missing(jan_april_mean_ppt)
	drop if missing(awc)
	*Drop wr_groups if they have missing authorized quantities or acreage
	drop if missing(wrg_authquant_IRR_umw) | missing(wrg_authquant_all_umw) | missing(wrg_auth_irr_acres)
	*Drop observations with recorded withdrawals but no irrigated acres and vice-versa
	drop if af_used_irr>0 & acres_irr==0
	drop if acres_irr>0 & af_used_irr==0
	*Drop extreme values for af_used, acres_irr 
		sum af_used, d
			drop if af_used >= r(p99) & af_used < .
		sum acres_irr, d
			drop if acres_irr >= r(p99) & acres_irr < .
		sum depth_applied, d
			drop if depth_applied >= r(p99) & depth_applied < .		
	*Change total precipitation and evapotranspiration variables to inches
	foreach var of varlist total_preseason_ET0_elev total_growseason_ET0_elev ///
		total_preseason_ET0_Har total_growseason_ET0_Har total_preseason_ppt ///
		total_growseason_ppt {
			replace `var' = `var'/25.4
		}
	
/* Label variables */
	label var wua_year "Year"
	label var af_used "Acre-feet applied"
	label var acres_irr "Irrigated acres"
	label var depth_applied "Depth applied - acre-feet applied per acre"
	label var awc "Available water capacity"
	label var sandtotal "Sand content (%)"
	label var silttotal "Silt content (%)"
	label var slope "Slope"
	label var jan_april_mean_ppt "mean monthly preseason precipitation (mm)"
	label var may_sep_mean_ppt "mean monthly growing season precipitation (mm)"
	label var jan_april_mean_ET0_elev "mean daily preseason evapotranspiration, using elevation from ssurgo"
	label var may_sep_mean_ET0_elev "mean daily growing season evapotranspiration, using elevation from ssurgo"
	label var jan_april_mean_ET0_Har "mean daily preseason evapotranspiration"
	label var may_sep_mean_ET0_Har "mean daily growing season evapotranspiration"

*Remove wr_groups with other technologies or with fractional data
*for systems codes we're interested in *
	drop if type_system_2 > 0 | type_system_5 > 0 | type_system_6 > 0 ///
	| type_system_7 > 0 | type_system_8 > 0 | type_system_9 > 0
	*Drop fractional system_codes
		foreach var of varlist type_system_1 type_system_3 type_system_4 {
			keep if `var'==1 | `var'==0
		}

*Check how many groups switch into each final system type multiple times
	gen switch_flood_cp = l.type_system_1==1 & type_system_3==1
		by WR_GROUP: egen num_switch_flood_cp = total(switch_flood_cp)
		tab num_switch_flood_cp
	gen switch_flood_lepa = l.type_system_1==1 & type_system_4==1
		by WR_GROUP: egen num_switch_flood_lepa = total(switch_flood_lepa)
		tab num_switch_flood_lepa
	gen switch_cp_lepa = l.type_system_3==1 & type_system_4==1
		by WR_GROUP: egen num_switch_cp_lepa = total(switch_cp_lepa)
		tab num_switch_cp_lepa
	gen switch_flood_cplepa = l.type_system_1==1 & type_system_3==1
		/* Also include those who switch from flood straight to LEPA */
		replace switch_flood_cplepa = 1 if l.type_system_1==1 & type_system_4==1
		by WR_GROUP: egen num_switch_flood_cplepa = total(switch_flood_cplepa)
		tab num_switch_flood_cplepa

*Create variables showing reversion from one to another, and how many times they revert
	gen reversion_cp_flood = l.type_system_3==1 & type_system_1==1
		by WR_GROUP: egen num_revers_cp_flood = total(reversion_cp_flood)
		tab num_revers_cp_flood
	gen reversion_lepa_flood = l.type_system_4==1 & type_system_1==1
		by WR_GROUP: egen num_revers_lepa_flood = total(reversion_lepa_flood)
		tab num_revers_lepa_flood
	gen reversion_lepa_cp = l.type_system_4==1 & type_system_3==1
		by WR_GROUP: egen num_revers_lepa_cp = total(reversion_lepa_cp)
		tab num_revers_lepa_cp
	gen reversion_cplepa_flood = l.type_system_3==1 & type_system_1==1
		replace reversion_cplepa_flood = 1 if l.type_system_4==1 & type_system_1==1
		by WR_GROUP: egen num_revers_cplepa_flood = total(reversion_cplepa_flood)
		tab num_revers_cplepa_flood

*Drop multiple switchers or reverters
	drop if num_switch_flood_cp>=2
	drop if num_revers_cp_flood>=1
	drop if num_switch_flood_lepa>=2
	drop if num_revers_lepa_flood>=1
	drop if num_switch_cp_lepa>=2
	drop if num_revers_lepa_cp>=1
	drop if num_switch_flood_cplepa>=2
	drop if num_revers_cplepa_flood>=1
		
*Create variable with gmd number
	gen gmd = 0
	forvalues i=1(1)5 {
		replace gmd = `i' if gmd_`i'==1
	}
	
eststo clear

*Create table of summary statistics for all transitions
estpost summarize af_used af_used_irr acres_irr depth_applied ///
	sandtotal silttotal awc ///
	sy predev_dtw predev_sat hyd_cond /// 
	total_preseason_ET0_elev total_growseason_ET0_elev ///
	total_preseason_ppt total_growseason_ppt, d
esttab . using "$dr_output_main/table1_wrg_alltech_sum_stats.rtf" , cells("mean(fmt(%9.2fc)) p50(fmt(%9.2fc)) sd min max") title("Table 1: Summary Statistics") nomtitles label replace wide 

*Now make tables for each technology separately
	*Start with flood 
	preserve
	keep if type_system_1==1
	estpost summarize af_used af_used_irr acres_irr depth_applied, d
	esttab . using "$dr_output_main/table1_wrg_flood_sum_stats.rtf" , cells("mean(fmt(%9.2fc)) p50(fmt(%9.2fc)) sd min max") title("Table 1: Flood Summary Statistics") nomtitles label replace wide 
	restore 

	*Now center-pivot
	preserve
	keep if type_system_3==1
	estpost summarize af_used af_used_irr acres_irr depth_applied, d
	esttab . using "$dr_output_main/table1_wrg_cp_sum_stats.rtf" , cells("mean(fmt(%9.2fc)) p50(fmt(%9.2fc)) sd min max") title("Table 1: Center-pivot Summary Statistics") nomtitles label replace wide 
	restore 
	
	*Now LEPA
	preserve
	keep if type_system_4==1
	estpost summarize af_used af_used_irr acres_irr depth_applied, d
	esttab . using "$dr_output_main/table1_wrg_lepa_sum_stats.rtf" , cells("mean(fmt(%9.2fc)) p50(fmt(%9.2fc)) sd min max") title("Table 1: LEPA Summary Statistics") nomtitles label replace wide 
	restore 

********************************************************************************
********************************** Figure 1 ************************************
********************************************************************************	
use "$dr_temp\wrg_collapsed.dta", replace
		xtset WR_GROUP wua_year
		keep if wua_year>1991
	/* Address missing and extreme values */
		*Drop wr_groups who don't have predevelopment characteristics
		keep if !missing(sy)
		keep if !missing(predev_sat)
		keep if !missing(predev_dtw)
		keep if !missing(hyd_cond)
		*Drop wr_groups if they have missing precip or soil data
		drop if missing(jan_april_mean_ppt)
		drop if missing(awc)
		*Drop wr_groups if they have missing authorized quantities or acreage
		drop if missing(wrg_authquant_IRR_umw) | missing(wrg_authquant_all_umw) | missing(wrg_auth_irr_acres)
		*Drop observations with recorded withdrawals but no irrigated acres and vice-versa
		drop if af_used_irr>0 & acres_irr==0
		drop if acres_irr>0 & af_used_irr==0
		*Create a irrigation intensity outcome variable
		bysort WR_GROUP wua_year: gen depth_applied = af_used_irr/acres_irr
		*Drop extreme values for af_used, acres_irr 
			sum af_used, d
				drop if af_used >= r(p99) & af_used < .
			sum acres_irr, d
				drop if acres_irr >= r(p99) & acres_irr < .
			sum depth_applied, d
				drop if depth_applied >= r(p99) & depth_applied < .		

		/* Label variables */
			label var wua_year "Year"
			label var af_used "Acre-feet applied"
			label var acres_irr "Irrigated acres"
			label var depth_applied "Depth applied - acre-feet applied per acre"
			label var awc "Available water capacity"
			label var sandtotal "Sand content (%)"
			label var silttotal "Silt content (%)"
			label var slope "Slope"
			label var jan_april_mean_ppt "Preseason precipitation (mm)"
			label var may_sep_mean_ppt "Growing season precipitation (mm)"
			label var jan_april_mean_ET0_elev "Preseason evapotranspiration, using elevation from ssurgo"
			label var may_sep_mean_ET0_elev "Growing season evapotranspiration, using elevation from ssurgo"
			label var jan_april_mean_ET0_Har "Preseason evapotranspiration"
			label var may_sep_mean_ET0_Har "Growing season evapotranspiration"

*Recreate type_system variable
gen type_system = . 
forvalues i=1(1)4 {
	replace type_system = `i' if type_system_`i'>0 & !missing(type_system_`i')
}
drop if type_system==2

*Collapse
collapse (mean) af_used_irr acres_irr depth_applied, by(type_system wua_year)

*Af_used_irr
tw (line af_used_irr wua_year if type_system==1, lcolor(navy%100) lpattern(dot) lwidth(1.2)) ///
	(line af_used_irr wua_year if type_system==3, lcolor(magenta%100) lpattern(dash)) ///
	(line af_used_irr wua_year if type_system==4, lcolor(dkorange%100) lpattern(l)), ///
		title("Mean acre-feet withdrawn", size(small)) ///
		xscale(range(1991 2019)) xlabel(1991(3)2019, labsize(vsmall) nogrid) ///
		xtitle("Year", size(vsmall)) ///
		yscale(range(50 300)) ylabel(50(50)300, nogrid) ///
		ytitle("Acre-feet", size(small)) ///
		legend(rows(1) order(1 2 3) ///
		label(1 "Flood irrigation") /// 
		label(2 "Traditional center pivot irrigation") ///
		label(3 "LEPA irrigation") ///
		cols(1) size(tiny)) ///
			graphregion(color(white)) name(af_used, replace)
		
*Acres_irr
tw (line acres_irr wua_year if type_system==1, lcolor(navy%100) lpattern(dot) lwidth(1.2)) ///
	(line acres_irr wua_year if type_system==3, lcolor(magenta%100) lpattern(dash)) ///
	(line acres_irr wua_year if type_system==4, lcolor(dkorange%100) lpattern(l)), ///
		title("Mean acres irrigated", size(small)) ///
		xscale(range(1991 2019)) xlabel(1991(3)2019, labsize(vsmall) nogrid) ///
		yscale(range(50 250)) ylabel(50(50)250, nogrid) ///
		xtitle("Year", size(small)) ///
		ytitle("Acres", size(small)) ///
		legend(off) ///
			graphregion(color(white)) name(acres_irr, replace)
		
*Depth_applied
tw (line depth_applied wua_year if type_system==1, lcolor(navy%100) lpattern(dot) lwidth(1.2)) ///
	(line depth_applied wua_year if type_system==3, lcolor(magenta%100) lpattern(dash)) ///
	(line depth_applied wua_year if type_system==4, lcolor(dkorange%100) lpattern(l)), ///
		title("Mean depth applied", size(small)) ///
		xscale(range(1991 2019)) xlabel(1991(3)2019, labsize(vsmall) nogrid) ///
		yscale(range(.75 1.5)) ylabel(.75(.25)1.5, nogrid) ///
		xtitle("Year", size(small)) ///
		ytitle("Feet", size(small)) ///
		legend(off) ///
			graphregion(color(white)) name(depth_applied, replace)

/* Combine the graphs */
grc1leg2 af_used acres_irr depth_applied, ///
	rows(3) cols(1) legendfrom(af_used) xtob1title xtitlefrom(af_used) ///
	labsize(vsmall) lmsize(tiny) graphregion(color(white)) xsize(6.49) ysize(7.99)
	*Stop, run graph editor, and move "Year" label for x axis above the legend
graph export "$dr_output_main/figure1.tif", replace wid(6500) height(7990)

********************************************************************************
********************************** Figure 2 *************************************
********************************************************************************	
/* Load in the data collapsed to the WR_GROUP level*/
	use "$dr_temp\wrg_collapsed.dta", replace
	*Install cleanplots package
	net install cleanplots, from("https://tdmize.github.io/data/cleanplots") 
	set scheme cleanplots, perm

*Process data
	collapse (sum) flood_acres cp_acres lepa_acres acres_irr, by(wua_year)
	gen other_acres = acres_irr - flood_acres - cp_acres - lepa_acres
	drop acres_irr
	foreach var of varlist flood_acres cp_acres lepa_acres other_acres {
		gen share_`var' = 100*`var'/(flood_acres+cp_acres+lepa_acres+other_acres)
	}
	rename (flood_acres cp_acres lepa_acres other_acres share_flood_acres share_cp_acres share_lepa_acres share_other_acres) ///
		(acres1 acres2 acres3 acres4 share_acres1 share_acres2 share_acres3 share_acres4)
	reshape long acres share_acres, i(wua_year) j(tech)

	gen tech_name = "Flood"
	replace tech_name = "Traditional center pivot" if tech==2
	replace tech_name = "LEPA" if tech==3
	replace tech_name = "Other system" if tech==4

	label define tech_codes 1 "Flood" 2 " Traditional center pivot" 3 "LEPA" 4 "Other system"
	label values tech tech_codes

*Xtset and graph
	xtset tech wua_year
		xtline acres, overlay
			
	*Stack the data 
	gen stack_acres  = .   // generate empty variables
	gen stack_share_acres = .
	sort wua_year tech  // it is import to sort the data here.
	levelsof wua_year, local(years) 
		foreach y of local years {
		summ tech
		// take the first tech as it is
		replace stack_acres = acres if wua_year==`y' & tech==`r(min)'
		// iteratively add up the 
		replace stack_acres = acres + stack_acres[_n-1]  if  wua_year==`y' & tech!=`r(min)'  
		  
		* stack shares  
		replace stack_share_acres = share_acres if  wua_year==`y' & tech==`r(min)' 
		replace stack_share_acres = share_acres + stack_share_acres[_n-1] if  wua_year==`y' & tech!=`r(min)'
	}

*Graph stacked share of acres
	xtline stack_share_acres, overlay 

*Now fill in the regions and fancy graph 
**** get the labels in order
	summ wua_year 
	gen tag = 1 if wua_year==`r(max)'
	sort wua_year tech
	*Generate rank variable 

		*Reverse ordering 
		sort wua_year tech
		summ tech  
		gen tech2 = `r(max)' + 1 - tech  // reverse the ordering
	sort wua_year tech
	gen labely = .
	 replace labely =  stack_share_acres / 2           if tech==1 & tag==1
	 replace labely = (stack_share_acres + stack_share_acres[_n-1]) / 2  if tech!=1 & tag==1
	egen rank = rank(share_acres)   if tag==1 & share_acres > 0 & share_acres!=., f 
	 
	format share_acres %9.1f
	gen labelval = tech_name + " (" + string(share_acres, "%9.1fc") + "%)"   if rank <= 10
	levelsof tech2, local(levels)
	local items = `r(r)'
	 
	foreach x of local levels {
	display "`x'"
	local x2 = `x' + 2
	display "`x2'"
	colorpalette mrc, n(7) nograph
	 local stacklines2 `stacklines2' area stack_share_acres wua_year if tech2 == `x', fcolor("`r(p`x2')'") lcolor(black) lwidth(*0.2) ||
	 }
	summ wua_year 
	local x1 = `r(min)'
	local x2 = `r(max)'
	summ wua_year
	summ acres if wua_year==`r(max)'
	  local ctot = `r(sum)'
	  local ctot : di %7.0fc `ctot'
  
*Graph
graph twoway `stacklines2' ///
 (scatter labely wua_year if tag==1, ms(smcircle) msize(0.2) mcolor(black%20) mlabel(labelval) mlabsize(small) mlabcolor(black) ) ///
 , ///
  legend(off) ///
  ytitle("Percent of irrigated acreage", size(small)) ///
  ylabel(, labsize(small) nogrid) ///
  xtitle("") ///
  xlabel(`x1'(1)`x2', labsize(small) angle(vertical) nogrid) ///
  xscale(range(1991 2025)) ///
  title("Share of irrigated acreage by system") ///
  graphregion(margin(0 1 .5 .5))
graph export "$dr_output_main/figure2.tif", replace wid(6500) height(3100)

*Close log
	log close